We want real-time inference and have very little data to work with, so we will select a pre-trained MobileNet_V2 as a model for image classification.
import torch
import torchvision
mobilenet_v2 = torchvision.models.mobilenet_v2(pretrained=True, progress=True)
torch.cuda.is_available()
Since the objective is image classification using a model that was pre-trained on ImageNet, we will use the following transformations:
from torchvision import transforms as T
from torchvision import datasets
def data_transformers(split:str):
# using ImageNet mean and standard deviations to normalize images
mu = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
# training images will be randomly cropped and horizontally flipped
# validation images will only be normalized
data_transforms = {'train': T.Compose([
T.RandomResizedCrop(224),
T.RandomHorizontalFlip(),
T.ToTensor(), # image is scaled to [0-1]
T.Normalize(mu, std)
]),
'val': T.Compose([
T.Resize(256),
T.CenterCrop(224),
T.ToTensor(),
T.Normalize(mu, std)
])}
return data_transforms[split]
train_trans = data_transformers('train')
val_trans = data_transformers('val')
The original dataset was restructured to make use of native pytorch helper functions. As a result, the the labels files are no longer needed and images are placed in the following directory structure:
Due to the way Pytorch reads directories, the classes are assigned differently.
This is significant, because that changes the way we evaluate the model. The binary indicator for non-emergency vehicles is 1. The assumed priority is the 0 class, meaning that the metric of interest if our goal is to maximize the detection of emergency vehicles will be recall rather than precision.
The model's outputs are logits, and once exponentiated, represent the estimated likelihood that an object is a non-emergency vehicle. The conversion we can apply to change the exponentiated output is (1 - x) * 100
import os
data_dir = '/home/ubuntu/datasets/Emergency_Vehicles/dataset_v2/'
batch_size = 16
tset = datasets.ImageFolder(os.path.join(data_dir, 'train'), train_trans)
vset = datasets.ImageFolder(os.path.join(data_dir, 'val'), val_trans)
dsets = {'train': tset, 'val': vset}
dset_loaders = {x: torch.utils.data.DataLoader(dsets[x], batch_size=batch_size,
shuffle=True, num_workers=25)
for x in ['train', 'val']}
dset_sizes = {x: len(dsets[x]) for x in ['train', 'val']}
dset_classes = dsets['train'].classes
for k,v in dset_sizes.items():
print(k, v)
print(dset_classes)
print('number of batches in each data loader')
for k,v in dset_loaders.items():
print(k,len(v))
train 1495 val 151 ['emergency', 'non-emergency'] number of batches in each data loader train 94 val 10
from time import time
import copy
def set_parameter_requires_grad(model, feature_extracting):
if feature_extracting:
for param in model.parameters():
param.requires_grad = False
def train_model(model, dataloaders:dict(), criterion, optimizer, lr_scheduler, num_epochs=2):
since = time()
metrics = ['loss', 'accuracy', 'precision','recall','f1']
val_history = {x:list() for x in metrics}
train_history = {x:list() for x in metrics}
best_model = copy.deepcopy(model.state_dict())
best_acc = 0.0
best_f1 = 0.0
for epoch in range(num_epochs):
print(f'Epoch {epoch}/{num_epochs - 1}')
print('-' * 10)
for phase in ['train', 'val']:
if phase == 'train':
# optimizer = lr_scheduler(optimizer, epoch)
model.train().to(device)
else:
model.eval().to(device)
running_loss = 0.0
running_corrects = 0
tp = 0; fp = 0; tn = 0; fn = 0
for imgs, labels in dataloaders[phase]:
imgs = imgs.to(device)
labels = labels.to(device)
optimizer.zero_grad()
with torch.set_grad_enabled(phase == 'train'):
outputs = model(imgs)
loss = criterion(outputs.double(), labels.unsqueeze(1).double())
preds = torch.clip(outputs, 0, 1)
if phase == 'train':
loss.backward()
optimizer.step()
# statistics
running_loss += loss.item() * imgs.size(0)
running_corrects += torch.sum(preds.squeeze(1) == labels.data.to(device), dim=0)
for pred, truth in zip (preds.squeeze(1), labels.data.to(device)):
if truth == 1.0 and pred >= 1.0:
tp += 1
elif truth == 1.0 and pred < 1.0:
fn += 1
elif truth == 0.0 and pred >= 1.0:
fp += 1
elif truth == 0.0 and pred < 1.0:
tn += 1
scheduler.step()
epoch_loss = running_loss / len(dataloaders[phase].dataset)
epoch_acc = running_corrects / len(dataloaders[phase].dataset)
epoch_precision = tp / (tp + fp)
epoch_recall = tp / (tp + fn)
epoch_f1 = 2 / ((1 / epoch_precision) + (1 / epoch_recall))
values = [epoch_loss, epoch_acc, epoch_precision, epoch_recall, epoch_f1]
if phase == 'train':
for k,v in zip(metrics, values):
train_history[k].append(v)
else:
for k,v in zip(metrics, values):
val_history[k].append(v)
print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f} F1: {epoch_f1:.4f}')
# deep copy the model
if phase == 'val' and epoch_f1 > best_f1:
best_acc = epoch_acc
best_f1 = epoch_f1
best_model = copy.deepcopy(model.state_dict())
print()
time_elapsed = time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print(f'Best val Acc: {best_acc:4f} \nBest val F1: {best_f1:4f}')
# load best model weights
model.load_state_dict(best_model)
return model, {'train':train_history, 'val':val_history}
device = 'cuda' if torch.cuda.is_available() else 'cpu'
optimizer = torch.optim.Adam(mobilenet_v2.parameters(), .0001)
scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, gamma = .9)
criterion = torch.nn.BCEWithLogitsLoss()
mobilenet_v2.classifier = torch.nn.Linear(1280, 1)
outputs = train_model(mobilenet_v2, dset_loaders, criterion, optimizer, scheduler, num_epochs=20)
Epoch 0/19 ---------- train Loss: 0.1613 Acc: 0.8883 F1: 0.9195 val Loss: 0.1486 Acc: 0.9404 F1: 0.9617 Epoch 1/19 ---------- train Loss: 0.0887 Acc: 0.9478 F1: 0.9621 val Loss: 0.1934 Acc: 0.9404 F1: 0.9508 Epoch 2/19 ---------- train Loss: 0.0992 Acc: 0.9505 F1: 0.9648 val Loss: 0.1559 Acc: 0.9404 F1: 0.9556 Epoch 3/19 ---------- train Loss: 0.1024 Acc: 0.9431 F1: 0.9600 val Loss: 0.1431 Acc: 0.9470 F1: 0.9565 Epoch 4/19 ---------- train Loss: 0.0642 Acc: 0.9645 F1: 0.9803 val Loss: 0.1584 Acc: 0.9404 F1: 0.9560 Epoch 5/19 ---------- train Loss: 0.0676 Acc: 0.9619 F1: 0.9733 val Loss: 0.1773 Acc: 0.9536 F1: 0.9617 Epoch 6/19 ---------- train Loss: 0.0709 Acc: 0.9612 F1: 0.9697 val Loss: 0.1511 Acc: 0.9470 F1: 0.9565 Epoch 7/19 ---------- train Loss: 0.0647 Acc: 0.9652 F1: 0.9756 val Loss: 0.1708 Acc: 0.9470 F1: 0.9565 Epoch 8/19 ---------- train Loss: 0.0586 Acc: 0.9679 F1: 0.9785 val Loss: 0.1568 Acc: 0.9470 F1: 0.9617 Epoch 9/19 ---------- train Loss: 0.0580 Acc: 0.9605 F1: 0.9709 val Loss: 0.1569 Acc: 0.9536 F1: 0.9674 Epoch 10/19 ---------- train Loss: 0.0640 Acc: 0.9625 F1: 0.9738 val Loss: 0.1675 Acc: 0.9470 F1: 0.9560 Epoch 11/19 ---------- train Loss: 0.0580 Acc: 0.9672 F1: 0.9756 val Loss: 0.1502 Acc: 0.9536 F1: 0.9670 Epoch 12/19 ---------- train Loss: 0.0583 Acc: 0.9659 F1: 0.9780 val Loss: 0.1637 Acc: 0.9603 F1: 0.9674 Epoch 13/19 ---------- train Loss: 0.0480 Acc: 0.9726 F1: 0.9814 val Loss: 0.1558 Acc: 0.9470 F1: 0.9613 Epoch 14/19 ---------- train Loss: 0.0482 Acc: 0.9712 F1: 0.9786 val Loss: 0.1573 Acc: 0.9536 F1: 0.9670 Epoch 15/19 ---------- train Loss: 0.0541 Acc: 0.9666 F1: 0.9749 val Loss: 0.1674 Acc: 0.9536 F1: 0.9617 Epoch 16/19 ---------- train Loss: 0.0388 Acc: 0.9779 F1: 0.9844 val Loss: 0.1638 Acc: 0.9470 F1: 0.9613 Epoch 17/19 ---------- train Loss: 0.0530 Acc: 0.9686 F1: 0.9786 val Loss: 0.1598 Acc: 0.9470 F1: 0.9613 Epoch 18/19 ---------- train Loss: 0.0613 Acc: 0.9672 F1: 0.9774 val Loss: 0.1661 Acc: 0.9536 F1: 0.9617 Epoch 19/19 ---------- train Loss: 0.0500 Acc: 0.9666 F1: 0.9751 val Loss: 0.1588 Acc: 0.9536 F1: 0.9617 Training complete in 2m 18s Best val Acc: 0.953642 Best val F1: 0.967391
torch.save(outputs[0].state_dict(), './emergency_vehicle_1_mobilenetv2.pth')
model = torchvision.models.mobilenet_v2(pretrained=False)
model.classifier = torch.nn.Linear(1280, 1)
model.load_state_dict(torch.load('./emergency_vehicle_1_mobilenetv2.pth'))
model.eval()
MobileNetV2(
(features): Sequential(
(0): ConvNormActivation(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(4): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(6): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(7): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(8): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(9): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(10): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(11): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(12): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(13): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(14): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(15): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(16): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(17): InvertedResidual(
(conv): Sequential(
(0): ConvNormActivation(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvNormActivation(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(18): ConvNormActivation(
(0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
)
(classifier): Linear(in_features=1280, out_features=1, bias=True)
)
imgs, labels = next(iter(dset_loaders['val']))
print(imgs.size(), labels.size())
torch.Size([16, 3, 224, 224]) torch.Size([16])
These randomly selected images are from the validation set. For presentation purposes, I am partially reversing the normalization procedure.
import matplotlib.pyplot as plt
import numpy as np
def inverseNorm(img):
std = [0.229, 0.224, 0.225]
mu = [0.485, 0.456, 0.406]
# pytorch functions that stop in-place operations
img = img.detach().clone()
# inverse norm operations
for idx in range(3):
img[idx,:,:] = img[idx,:,:] * std[idx]
img[idx,:,:] = img[idx,:,:] + mu[idx]
return img.numpy()
label_map = {1:'non-emergency', 0:'emergency'}
plt.figure(figsize=(20,20))
for idx, (im, lab) in enumerate(zip(imgs, labels)):
im = inverseNorm(im)
im = np.moveaxis(im, 0, -1)
plt.subplot(4,4,idx + 1)
plt.imshow(im)
plt.title(label_map[int(lab.numpy())], fontdict={'fontsize':25})
plt.axis('off')
The model correctly identified 100% of 16 images in this validation sample.
logits = model(imgs)
preds = torch.clip(logits, 0, 1)
correct = torch.sum(preds.squeeze(1) == labels.data, dim=0)
print(correct)
tensor(15)
label_map = {1:'non-emergency', 0:'emergency'}
plt.figure(figsize=(20,20))
for idx, (im, lab) in enumerate(zip(imgs, preds)):
im = inverseNorm(im)
im = np.moveaxis(im, 0, -1)
plt.subplot(4,4,idx + 1)
plt.imshow(im)
plt.title(label_map[int(lab.detach().numpy())], fontdict={'fontsize':25})
plt.axis('off')